#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from bs4 import BeautifulSoup
import requests

'a BeautifulSoup demo'

__author__ = 'manymore13'

URL = "https://movie.douban.com/top250"

def getPageHtml(url):
    headers = {
        # 'Host':'movie.douban.com',
        'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Mobile Safari/537.36',
        # 'Connection':'keep-alive',
        # 'Cache-Control':'max-age=0',
        # 'Upgrade-Insecure-Requests':'1',
        # 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        # 'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7',
        # 'Cookie':'_pk_ses.100001.4cf6=*',
        # '':'',
    }
    data = requests.get(url, headers = headers).content
    return data

def parseHtml(htmlStr):
    soup = BeautifulSoup(htmlStr,"html.parser")
    movieListTag = soup.find('ol',attrs={'class':'grid_view'})
    tagLiList = movieListTag.find_all('li')
    for tagli in tagLiList:
        title = tagli.find('span',attrs={'class':'title'}).getText()
        print(title)
    # print(soup.ol.string)

def save(path,htmlStr):
    f = open(path, 'wb+')
    f.write(htmlStr)
    f.close

def main():
    pageHtml = getPageHtml(URL)
    parseHtml(pageHtml)
    # save('d:/douban.html',pageHtml)
    # print(pageHtml)

if __name__ == "__main__":
    main()